clear all 
set more off 
set maxvar 15000 
clear matrix


**********************************************************************************************************************************************
**********************************************************************************************************************************************
**** IGE VS RANK OVER TIME
**********************************************************************************************************************************************
**********************************************************************************************************************************************
    
	use "$Mydirectory1/3_Output/2_PooledData_analysis.dta", clear 
    keep if baseline_sample==1
    
    gen yhat_father_levels = exp(log_father_baseline)

    forval i=1(1)4 {
        gen est_`i'=. 
        gen est_lb_`i' =.
        gen est_ub_`i' =.
    }
    
    levelsof decade, local(decades) 
    
* 1a. Traditional IGE
    foreach x of local decades {
        di "`x'"
        quietly reg log_son_baseline log_father_baseline if decade==`x' [pw=wgt_sex_race], robust 
            replace est_1 = _b[log_father_baseline] if decade==`x'
            replace est_ub_1 = _b[log_father_baseline]+1.96*_se[log_father_baseline] if decade==`x'
            replace est_lb_1 = _b[log_father_baseline]-1.96*_se[log_father_baseline] if decade==`x'
    }

*1b. Approximate IGE using levels 
    foreach x of local decades {
        di "`x'"
        
        sum yhat_father_levels if decade==`x'
        local e_yp = `r(mean)'
        sum fam_inc_real if decade==`x'
        local e_yc = `r(mean)'
        local ratio1 = `e_yp'/`e_yc'
        display `ratio1'
        
        quietly reg fam_inc_real yhat_father_levels if decade==`x' [pw=wgt_sex_race], robust 
            
        lincom yhat_father_levels * `ratio1'
        
            replace est_2 = `r(estimate)' if decade==`x'
            replace est_ub_2 = `r(estimate)'+1.96*`r(se)' if decade==`x'
            replace est_lb_2 = `r(estimate)'-1.96*`r(se)' if decade==`x'
                
    }
        
* 2a. Traditional rank-rank 
    foreach x of local decades {
        di "`x'"
        quietly reg rank_son_baseline rank_father_baseline if decade==`x' [pw=wgt_sex_race], robust  
            replace est_3 = _b[rank_father_baseline] if decade==`x'
            replace est_ub_3 = _b[rank_father_baseline]+1.96*_se[rank_father_baseline] if decade==`x'
            replace est_lb_3 = _b[rank_father_baseline]-1.96*_se[rank_father_baseline] if decade==`x'
    }
        
* 2b. Approximate IGC using levels
    foreach x of local decades {    
    
        quietly sum yhat_father_levels if decade==`x', d
        local var_yp = `r(Var)'
        quietly sum fam_inc_real if decade==`x', d
        local var_yc = `r(Var)'
        local ratio1 = sqrt(`var_yp'/`var_yc')
        
        local numb0 = 6/c(pi) 
        
        quietly reg fam_inc_real yhat_father_levels if decade==`x' [pw=wgt_sex_race], robust 
            
        nlcom (`numb0'*asin((_b[yhat_father_levels] * `ratio1')/2)), post //formula for approximating rank-rank using (6/pi)*arcsin(\beta_IGC / 2)
        
        mat C1 = e(b)
        scalar coeff1 = C1[1,1]
        mat C2 = e(V)
        scalar temp = C2[1,1] //variance
        scalar se1 = sqrt(temp)
    
        replace est_4 = coeff1 if decade==`x'
        replace est_ub_4 = coeff1+1.96*se1 if decade==`x'
        replace est_lb_4 = coeff1-1.96*se1 if decade==`x'
                
    }
        
    bysort decade: keep if _n==1
    keep decade est_*
    
    reshape long est_ est_lb_ est_ub_, i(decade) j(estimate)
    replace decade= decade+1.5 if estimate==2
    replace decade= decade+3 if estimate==3
    replace decade= decade+4.5 if estimate==4
    
* Figure
    #delimit ;
    twoway (scatter est_ decade if estimate==1,  msymbol(circle) mcolor(midblue) msize(medium) yaxis(1)) 
           (rcap est_lb_  est_ub_  decade if estimate==1, lpatter(solid) lcolor(midblue) yaxis(1) lwidth(0.5))
           (scatter est_ decade if estimate==2,  msymbol(triangle) mcolor(navy) msize(small) yaxis(1)) 
           (rcap est_lb_  est_ub_  decade if estimate==2, lpatter(solid) lcolor(navy) yaxis(1) lwidth(0.5) )
           
           (scatter est_ decade if estimate==3,  msymbol(diamond) mcolor(orange) msize(small) yaxis(2)) 
           (rcap est_lb_  est_ub_  decade if estimate==3, lpatter(solid) lcolor(orange) yaxis(2) lwidth(0.5) )
                   (scatter est_ decade if estimate==4,  msymbol(diamond) mcolor(cranberry) msize(small) yaxis(2)) 
           (rcap est_lb_  est_ub_  decade if estimate==4, lpatter(solid) lcolor(cranberry) yaxis(2) lwidth(0.5) )
           ,
    xti(" " "Decade of respondent's birth") xlabel(1910(10)1970) xscale(range(1905 1975))
    ylabel(0(.25)1, axis(1)) yti("IGE" " ", axis(1)) 
    legend(on ring(0) row(4) pos(8) order(1 "IGE" 3 "IGE approximation" 5 "Rank-rank correlation" 7 "Rank-rank approximation"))
    ylabel(0.15(.1)0.45, axis(2)) yti(" " "Rank-rank", axis(2))
    xlabel(1910 "1910s" 1920 "1920s" 1930 "1930s" 1940 "1940s" 1950 "1950s" 1960 "1960s" 1970 "1970s", labsize(small) ) ;  
    #delimit cr
    graph export "$Mydirectory2/appendix_d/Recovering_measures_using_linear.pdf", as(pdf) replace
